Dieses Notebook bereitet die Daten für die Intelligent Zoning Engine vor. Es speichert

Bereits in anderen scripten wurde vorbereitet:

Die Daten werden wiefolgt vorbereitet:

TODO: - die sozioökonomischen Faktoren werden aus den Wahlbezirken auf die Blöcke hochgerechnet (https://github.com/berlinermorgenpost/cogran)

Laden der Daten

sampled_buildings = read_rds('output/sampled_buildings.rds')
bez = readOGR('download/RBS_OD_BEZ_2015_12.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
OGR data source with driver: GeoJSON 
Source: "download/RBS_OD_BEZ_2015_12.geojson", layer: "OGRGeoJSON"
with 13 features
It has 2 fields
blk = readOGR('download/RBS_OD_BLK_2015_12.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
OGR data source with driver: GeoJSON 
Source: "download/RBS_OD_BLK_2015_12.geojson", layer: "OGRGeoJSON"
with 15720 features
It has 4 fields
lor = readOGR('download/RBS_OD_LOR_2015_12.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
OGR data source with driver: GeoJSON 
Source: "download/RBS_OD_LOR_2015_12.geojson", layer: "OGRGeoJSON"
with 447 features
It has 8 fields
re_schulstand = readOGR('download/re_schulstand.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
OGR data source with driver: GeoJSON 
Source: "download/re_schulstand.geojson", layer: "OGRGeoJSON"
with 709 features
It has 20 fields

Schulwege

route_matrix = read_rds('output/route_matrix.rds')

Schulkapazitäten und Einwohnerzahler auf LOR-Ebene

kapas = read_csv('download/anmeldezahlen.csv') %>% filter(grepl('G', Schulnummer)) %>% filter(!is.na(`Plätze`))
Parsed with column specification:
cols(
  Bezirk = col_character(),
  Schulnummer = col_character(),
  Schulname = col_character(),
  Plätze = col_integer(),
  Anmeldungen = col_character()
)
einwohner_lor = read_delim('download/EWR201512E_Matrix.csv', delim=';')
Parsed with column specification:
cols(
  .default = col_character(),
  ZEIT = col_integer(),
  STADTRAUM = col_integer(),
  E_E = col_number(),
  E_EM = col_number(),
  E_EW = col_number(),
  E_E50_55 = col_number(),
  E_E25U55 = col_number(),
  E_E55U65 = col_number(),
  E_E65U80 = col_number()
)
See spec(...) for full column specifications.

Überprüfung der Vollständigkeit der Daten über Anmeldezahlen/Kapazitäten

re_schulstand_df = re_schulstand %>% as.data.frame() %>% rename(lon=coords.x1, lat=coords.x2)
re_schulstand_df %>% filter(grepl('G', spatial_name)) %>% mutate(BEZIRK=enc2utf8(as.character(BEZIRK))) %>%
  group_by(BEZIRK) %>% summarise(`Anzahl Schulen` = n()) %>%
  rename(Bezirk=BEZIRK) %>% left_join(kapas %>% group_by(Bezirk) %>% summarise(`Mit Kapazität` = n()))
Joining, by = "Bezirk"

Für welche Bezirke haben wir für alle Schulen Kapazitäten gegeben?

re_schulstand %>% as.data.frame() %>% filter(grepl('G', spatial_name)) %>% mutate(BEZIRK=enc2utf8(as.character(BEZIRK))) %>% group_by(BEZIRK) %>% summarise(`Anzahl Schulen` = n()) %>%
  rename(Bezirk=BEZIRK) %>% left_join(kapas %>% group_by(Bezirk) %>% summarise(`Mit Kapazität` = n())) %>% filter(`Anzahl Schulen` == `Mit Kapazität`)
Joining, by = "Bezirk"

Überprüfung ob die Liste der Schulen und Liste der Schulen mit Kapazitätsinformationen gleich sind:

bezirk = 'Tempelhof-Schöneberg'
schulen_mit_kapa = kapas %>% filter(Bezirk == bezirk) %>% .$Schulnummer
schulen_mit_kapa
 [1] "07G01" "07G02" "07G03" "07G05" "07G06" "07G07" "07G10" "07G12"
 [9] "07G13" "07G14" "07G15" "07G16" "07G17" "07G18" "07G19" "07G20"
[17] "07G21" "07G22" "07G23" "07G24" "07G25" "07G26" "07G27" "07G28"
[25] "07G29" "07G30" "07G31" "07G32" "07G34" "07G35" "07G36" "07G37"
07G01

07G02

07G03

07G05

07G06

07G07

07G10

07G12

07G13

07G14

07G15

07G16

07G17

07G18

07G19

07G20

07G21

07G22

07G23

07G24

07G25

07G26

07G27

07G28

07G29

07G30

07G31

07G32

07G34

07G35

07G36

07G37
grundschulen = re_schulstand %>% as.data.frame() %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK == bezirk) %>% .$spatial_name
'In Anmeldeliste, fehlt in Schulstand'
[1] "In Anmeldeliste, fehlt in Schulstand"
In Anmeldeliste, fehlt in Schulstand
setdiff(schulen_mit_kapa, grundschulen)
character(0)
'In re_schulstand, fehlt in Anmeldeliste'
[1] "In re_schulstand, fehlt in Anmeldeliste"
In re_schulstand, fehlt in Anmeldeliste
setdiff(grundschulen, schulen_mit_kapa)
character(0)
map = get_map('Berlin')
Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Berlin&zoom=10&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Berlin&sensor=false
re_schulstand_df_w_kapas = re_schulstand_df %>% left_join(kapas, by=c('spatial_name'='Schulnummer'))

Plot aller Schulen, mit der Info, ob Kapazitätsinformationen verfügbar sind.

data = re_schulstand_df_w_kapas %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK==bezirk) %>% mutate(missing.capa=is.na(`Plätze`))
ggmap(map) + geom_point(aes(lon, lat, color=missing.capa), data=data) +
    coord_map(xlim=c(min(data$lon)-0.01, max(data$lon)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))

Filter auf Schulen mit Kapazitätsinformationen (für T-S sind das alle):

relevant_schools = re_schulstand_df_w_kapas %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK==bezirk & !is.na(`Plätze`)) %>% .$spatial_name
relevant_schools
 [1] "07G01" "07G02" "07G03" "07G05" "07G06" "07G07" "07G10" "07G12"
 [9] "07G13" "07G14" "07G15" "07G16" "07G17" "07G18" "07G19" "07G20"
[17] "07G21" "07G22" "07G23" "07G24" "07G25" "07G26" "07G27" "07G28"
[25] "07G29" "07G30" "07G31" "07G32" "07G34" "07G35" "07G36" "07G37"
07G01

07G02

07G03

07G05

07G06

07G07

07G10

07G12

07G13

07G14

07G15

07G16

07G17

07G18

07G19

07G20

07G21

07G22

07G23

07G24

07G25

07G26

07G27

07G28

07G29

07G30

07G31

07G32

07G34

07G35

07G36

07G37

Mapping Bezirk->LOR->Block

df_bez = as.data.frame(bez)
df_lor = as.data.frame(lor)
df_blk = as.data.frame(blk)

Sanity-Check: LORs und Blöcke im Bezirk

bez_id = filter(df_bez, BEZNAME == bezirk)$BEZ
relevant_lors = df_lor %>% filter(BEZ == bez_id)
relevant_blks = df_blk %>% filter(BEZ == bez_id)
ggplot() + geom_path(aes(x=long, y=lat, group=group), data=lor[lor$BEZ == bez_id,]) + coord_map() + geom_path(aes(x=long, y=lat, group=group), data=bez, color='red')
Regions defined for each Polygons
Regions defined for each Polygons

Blöcke im Bezirk

ggplot() + geom_path(aes(x=long, y=lat, group=group), data=blk[blk$BEZ == bez_id,]) + coord_map() + geom_path(aes(x=long, y=lat, group=group), data=bez[bez$BEZ == bez_id,], color='red')
Regions defined for each Polygons
Regions defined for each Polygons

Alter auf Blockebene

Die Struktur der Datei in CSV-Format ist: BLK, 0,1,2,3,4,5,6,7,8,9,10,11,Gesamtergebnis.

einwohner_blk = readOGR('download/03_2_BLK_Einw_Dez2015_Alter.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
OGR data source with driver: GeoJSON 
Source: "download/03_2_BLK_Einw_Dez2015_Alter.geojson", layer: "OGRGeoJSON"
with 1165 features
It has 14 fields
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>%
  left_join(
    einwohner_blk@data %>%
      set_names(strsplit("BLK,0,1,2,3,4,5,6,7,8,9,10,11,Gesamt", ",")[[1]]) %>%
      gather(age, num, -BLK, -Gesamt)
    , by=c('id'='BLK')
    ) %>%
  filter(!is.na(age)) %>%
  mutate(num=ifelse(num == 0, NA, num), age=factor(as.character(age), levels=as.character(0:11)))

ggmap(map) +
  geom_polygon(aes(x=long, y=lat, group=group, fill=num), data=data) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01)) +
  facet_wrap(~ age, ncol = 6)

data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>%
  left_join(
    einwohner_blk@data %>% set_names(strsplit("BLK,0,1,2,3,4,5,6,7,8,9,10,11,Gesamt", ",")[[1]]), by=c('id'='BLK')) %>%
  mutate(kids=(`5`+`6`)/2, kids=ifelse(kids == 0, NA, kids))

data$panel = "5&6"

data2 = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>%
  left_join(
    einwohner_blk@data %>% set_names(strsplit("BLK,0,1,2,3,4,5,6,7,8,9,10,11,Gesamt", ",")[[1]]), by=c('id'='BLK')) %>%
  mutate(kids=(`1`+`2`+`3`+`4`+`5`+`6`)/6, kids=ifelse(kids == 0, NA, kids))

data2$panel = "1-6"

data = rbind(data, data2)

ggmap(map) +
  geom_polygon(aes(x=long, y=lat, group=group, fill=kids), data=data) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01)) +
  facet_wrap(~ panel)

Berechnung der Population der Blöcke, Strukturquote

Strukturquote = 0.9

kids_in_blks = einwohner_blk@data %>%
  set_names(strsplit("BLK,0,1,2,3,4,5,6,7,8,9,10,11,Gesamt", ",")[[1]]) %>%
  mutate(kids=(`1`+`2`+`3`+`4`+`5`+`6`)/6*Strukturquote) # FIXME only 5 and 6?

row.names(kids_in_blks) = kids_in_blks$BLK

Verfügbare Plätze

relevant_kapas = kapas %>% select(Schulnummer, Kapa=`Plätze`) %>% filter(Schulnummer %in% relevant_schools) %>% as.data.frame()

Überprüfung der Summe der Kapazitäten, Anmeldungen und Kinderstatistiken

'Summe Kapas'
[1] "Summe Kapas"
Summe Kapas
relevant_kapas %>% .$Kapa %>% sum
[1] 2584
'Anmeldungen'
[1] "Anmeldungen"
Anmeldungen
kapas %>% mutate(Anmeldungen = as.numeric(gsub('[^0-9]', '', Anmeldungen))) %>% filter(Schulnummer %in% relevant_schools) %>% .$Anmeldungen %>% sum
[1] 2752
'Kids laut Statistik'
[1] "Kids laut Statistik"
Kids laut Statistik
kids_in_blks$kids %>% sum
[1] 2539.95

Schulwege von Blöcken zu Schulen aggregieren

Für jedes Wohngebäude suchen wir den zugehörigen Block

residential_buildings_blocks = sampled_buildings %>% inner_join(df_blk) %>% filter(BEZ == bez_id)
Joining, by = "BLK"
residential_buildings_blocks
routes_from_blks = residential_buildings_blocks %>%
  inner_join(route_matrix %>% filter(dst %in% relevant_schools), by=c('OI'='src'))
head(routes_from_blks)

Plot der relevanten Blöcke (mit Wohngebäuden) und Population

Welche Blöcke haben Routen( weil Wohngebäude) und eine Population (weil Kinder zwischen 1 und 6)?

polys = tidy(blk[blk$BEZ == bez_id,], region='BLK')
routedata = polys %>% inner_join(routes_from_blks %>% group_by(BLK) %>% summarise(n=n()), by=c('id'='BLK'))
popdata = polys %>% inner_join(kids_in_blks %>% filter(kids > 0), by=c('id'='BLK'))

ggmap(map) +
  geom_polygon(aes(x=long, y=lat, group=group), fill='blue', data=popdata) +
  geom_polygon(aes(x=long, y=lat, group=group), fill='red', data=routedata) +
  coord_map(xlim=c(min(routedata$long)-0.01, max(routedata$long)+0.01), ylim=c(min(routedata$lat)-0.01, max(routedata$lat)+0.01))

ggmap(map) +
  geom_polygon(aes(x=long, y=lat, group=group), fill='red', data=routedata) +
  geom_polygon(aes(x=long, y=lat, group=group), fill='blue', data=popdata) +
  coord_map(xlim=c(min(routedata$long)-0.01, max(routedata$long)+0.01), ylim=c(min(routedata$lat)-0.01, max(routedata$lat)+0.01))

travel_from_blks = routes_from_blks %>% as.data.frame() %>% group_by(BLK, dst) %>% summarise(min=min(distance), avg=mean(distance), med=median(distance), max=max(distance)) %>% ungroup
travel_from_blks

Plot der Blöcke mit Färbung nach durchschnittlichem Weg zur nächsten Schule

data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(travel_from_blks %>% group_by(BLK) %>% top_n(1, -avg), by=c('id'='BLK'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=-avg), data=data) +
  geom_point(aes(lon, lat), color='red', data = re_schulstand_df %>% filter(BEZIRK==bezirk & SCHULART=='Grundschule')) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))

travel_matrix = travel_from_blks %>% select(BLK, dst, avg) %>% spread(dst, avg)
dim(travel_matrix)
[1] 1043   33
travel_matrix

Sozioökonomische Daten

Wir haben Sozioökonomische Daten in den Wahlbezirken. Strategie: - Schneiden der Wahlbezirke mit den Blöcken - Übernahme des Prozentwertes vom Wahlbezirk für jeden (Unter-)block - Zuordnung zu jedem Block und Vereinigung durch Flächen/Wohnhaus-gewichtetes Mittel des Prozentwertes

UWB = readOGR('download/RBS_OD_UWB_AGH2016.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
OGR data source with driver: GeoJSON 
Source: "download/RBS_OD_UWB_AGH2016.geojson", layer: "OGRGeoJSON"
with 1779 features
It has 4 fields
UWB$ID = paste0(UWB$BEZ, UWB$UWB)
sozio_UWB = read_excel('download/DL_BE_AH2016_Strukturdaten.xlsx', sheet = 3) %>% select(ID, sgbIIu65=`Einwohner unter 65 in SGB II 2014 Prozent`)
UWB = UWB %>% sp::merge(sozio_UWB, by='ID')

ggplot(broom::tidy(UWB, region='ID') %>% inner_join(UWB %>% as.data.frame, by=c('id'='ID'))) + geom_polygon(aes(x=long, y=lat, group=group, fill=sgbIIu65)) + coord_map()

Check for self intersections

wgs84 = CRS(proj4string(blk))
ea_projection = CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs")
blk_in_bez = blk[blk$BEZ == bez_id,]
gIsValid(blk_in_bez)
[1] TRUE
blk_in_bez = gBuffer(spTransform(blk_in_bez, ea_projection), byid=T, width = -0.1)
any(xor(gIntersects(blk_in_bez, byid=T, returnDense = T), diag(1, length(blk_in_bez)) == 1))
[1] FALSE
plot(blk_in_bez)

#gIntersects(UWB[UWB$BEZ == '07',], byid=TRUE)
uwb_in_bez = gBuffer(spTransform(UWB[UWB$BEZ == '07',], ea_projection), byid=T, width=-0.1)
gIsValid(uwb_in_bez)
[1] TRUE
any(xor(gIntersects(uwb_in_bez, byid=T, returnDense = T), diag(1, length(uwb_in_bez)) == 1))
[1] FALSE
plot(uwb_in_bez)

intersection = gIntersection(uwb_in_bez, blk_in_bez, byid = T, drop_lower_td = T)

gIsValid(intersection)
[1] TRUE
plot(intersection)

intersection_uwb_data = intersection %>% over(uwb_in_bez)
intersection_blk_data = intersection %>% over(blk_in_bez)
intersection_area = gArea(intersection, byid=T)
intersection_data = cbind(
    intersection_uwb_data %>% select(UWB_ID=ID, sgbIIu65),
    intersection_blk_data %>% select(BLK, BLK_Einw=Einw),
    data.frame(area=intersection_area) # FIXME how else to normalize?
    )
length(intersection)
[1] 1219
nrow(blk_in_bez)
[1] 1201
# did we miss any blocks?
setdiff(blk_in_bez$BLK, intersection_data$BLK)
character(0)

Pro Block mische die SGB-Werte der Unterblöcke gewichtet nach Fläche. FIXME - besser nach Anzahl der Wohnhäuser?

sgbII_blk = intersection_data %>%
  group_by(BLK) %>%
  summarise(sgbIIu65=sum(sgbIIu65*area)/sum(area)/100)
ggplot(broom::tidy(spTransform(blk_in_bez, wgs84), region='BLK') %>% left_join(sgbII_blk, by=c('id'='BLK'))) + geom_polygon(aes(x=long, y=lat, group=group, fill=sgbIIu65)) + coord_map()

Adjazenz von Blöcken

row.names(blk_in_bez) = blk_in_bez$BLK
buffeded_blk_in_bez = gBuffer(blk_in_bez, byid=T, width=40)
adjacency = gIntersects(gBuffer(blk_in_bez, byid=T, width=40), byid = T)
rownames(adjacency) = blk_in_bez$BLK
colnames(adjacency) = blk_in_bez$BLK

adjacency_df = adjacency %>% as.data.frame() %>% mutate(from=rownames(.)) %>%
  gather(to, connected, -from) %>% filter(connected) %>%
  inner_join(spTransform(blk_in_bez, wgs84) %>% coordinates() %>% as.data.frame() %>% rename(from_long=V1, from_lat=V2) %>% mutate(from=rownames(.))) %>%
  inner_join(spTransform(blk_in_bez, wgs84) %>% coordinates() %>% as.data.frame() %>% rename(to_long=V1, to_lat=V2) %>% mutate(to=rownames(.)))
Joining, by = "from"
Joining, by = "to"
ggplot() +
  geom_polygon(aes(x=long, y=lat, group=group), fill='gray', data=broom::tidy(spTransform(blk_in_bez, wgs84), region='BLK')) + 
  geom_segment(aes(x=from_long, y=from_lat, xend=to_long, yend=to_lat), size=0.1, color='black', data=adjacency_df) +
  theme_nothing() + coord_map()
Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
instead

ggsave('figs/adjacency.pdf')
Saving 7 x 5 in image
adjacency_df %>% filter(connected & from != to) %>% select(from, to) %>% write_csv('app/data/adjacency.csv')

Relevante Daten auswählen

optim_kapas = relevant_kapas
optim_kids_in_blks = kids_in_blks %>% filter(kids > 0) %>% inner_join(travel_matrix, by='BLK') %>% select(BLK, kids)
nrow(optim_kids_in_blks)
[1] 966
nrow(optim_kapas)
[1] 32
select_schools = as.character(optim_kapas$Schulnummer)
select_blks = as.character(optim_kids_in_blks$BLK)

optim_matrix = inner_join(optim_kids_in_blks, travel_matrix, by='BLK')[select_schools]

dim(optim_matrix)
[1] 966  32
optim_kapas$Kapa %>% sum
[1] 2584
optim_kids_in_blks$kids %>% sum
[1] 2538.15

Naive (initiale) Zuordnung: Jeder Block zur nächsten Schule

solution = optim_matrix %>% mutate(BLK=optim_kids_in_blks$BLK) %>% gather(school, dist, -BLK) %>% group_by(BLK) %>% top_n(1, -dist) %>% ungroup

optim_matrix %>% t %>% as.data.frame %>% summarise_each(funs(min)) %>% sum()
[1] 736677.2
solines = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school')) %>% inner_join(cbind(as.data.frame(coordinates(blk[blk$BEZ == bez_id,])), blk[blk$BEZ == bez_id,]@data['BLK']))
Joining, by = "BLK"
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(solution, by=c('id'='BLK'))
ggmap(map, darken = c(0.5, 'white')) + geom_polygon(aes(x=long, y=lat, group=group, fill=school), data=data) +
  geom_segment(aes(x=V1,y=V2,xend=lon,yend=lat), data=solines, size=0.3) +
  geom_point(aes(lon, lat), color='black', size=2, data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  geom_point(aes(lon, lat, color=spatial_name), data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01)) +
  guides(color=F, fill=F)

Darstellung der Zuordnung als Tabelle

library(formattable)
solution %>% inner_join(optim_kids_in_blks, by='BLK') %>% inner_join(travel_from_blks, by=c('BLK'='BLK', 'school'='dst')) %>%
  group_by(school) %>% summarise(
    kids=sum(kids),
    num_blocks=n(),
    min_dist=min(min),
    avg_dist=mean((kids*avg)/sum(kids)),
    max_dist=max(max)
  ) %>%
  inner_join(relevant_kapas, by=c('school'='Schulnummer')) %>%
  mutate(
    utilization=kids/Kapa
  ) %>% select(
   Schule=school,
   `Blöcke`=num_blocks,
   Kapazität=Kapa,
   Kinder=kids,
   Auslastung=utilization,
   `Weg (min)`=min_dist,
   `Weg (Ø)`=avg_dist,
   `Weg (max)`=max_dist
  ) %>%
  formattable(
    list(
      Kinder = formatter("span", x ~ digits(x, 2)),
      Auslastung = formatter("span",
        style = x ~ style(color = ifelse(x < 1, "green", "red")),
        x ~ icontext(ifelse(x < 1, "ok", "remove"), percent(x))
      ),
      `Weg (Ø)` = proportion_bar("lightblue"),
      `Weg (min)` = proportion_bar("lightblue"),
      `Weg (max)` = proportion_bar("lightblue")
    )
  )
Schule Blöcke Kapazität Kinder Auslastung Weg (min) Weg (Ø) Weg (max)
07G01 15 66 85.05 128.86% 181.547440 700.0492 1346.5031
07G02 19 98 45.30 46.22% 105.499832 623.1903 1108.7021
07G03 17 73 54.45 74.59% 15.600883 367.5294 845.7204
07G05 19 78 94.50 121.15% 85.897629 601.2110 986.3845
07G06 27 55 84.75 154.09% 14.026745 797.5172 1234.1342
07G07 11 81 27.60 34.07% 43.258247 690.1380 1806.2324
07G10 26 112 146.55 130.85% 61.768867 607.5237 1464.8206
07G12 12 75 31.20 41.60% 130.699646 411.4655 674.6335
07G13 24 82 113.85 138.84% 25.176670 450.8940 1067.4244
07G14 31 78 70.35 90.19% 16.395737 402.8689 768.4520
07G15 51 104 184.20 177.12% 3.375702 808.5565 1474.6599
07G16 22 104 58.65 56.39% 22.915071 496.8528 944.5726
07G17 22 100 57.00 57.00% 88.050331 388.6880 695.9189
07G18 18 44 64.35 146.25% 2.263283 354.9368 662.2927
07G19 34 112 126.60 113.04% 2.990739 1094.2487 2207.5225
07G20 42 81 139.50 172.22% 110.340210 696.1919 1704.6410
07G21 24 72 79.20 110.00% 222.134750 545.1009 1511.8127
07G22 25 91 54.15 59.51% 3.750214 569.9378 1353.2026
07G23 8 50 15.75 31.50% 65.594955 708.3703 1333.5781
07G24 27 75 73.35 97.80% 92.837883 641.5522 1421.1660
07G25 37 75 141.75 189.00% 92.507385 652.2811 1237.5529
07G26 20 52 18.00 34.62% 57.143013 524.2916 896.1053
07G27 16 75 46.65 62.20% 95.367569 696.7090 1357.3337
07G28 53 75 78.60 104.80% 134.617737 1025.2054 2809.9302
07G29 93 104 104.25 100.24% 123.937210 928.5043 2608.2744
07G30 38 72 53.40 74.17% 101.384521 810.4917 2524.4463
07G31 18 78 46.80 60.00% 198.014374 871.0089 1560.6522
07G32 56 78 55.05 70.58% 7.506210 755.1895 1366.1006
07G34 32 100 155.40 155.40% 0.000000 1092.8906 2080.1841
07G35 34 75 90.00 120.00% 160.449890 789.4181 1531.3287
07G36 40 100 58.80 58.80% 144.836456 920.1215 2250.3574
07G37 55 69 83.10 120.43% 142.399994 1196.1082 2097.5693

ndH und LMB-Daten (nicht offen)

ndh_lmb = read_excel('download/LMB_ndH2015.xlsx') %>% select(
  entity_id=BSN,
  ndH=`Anteil ndH`,
  LMB=`Anteil LMB`
)
ndh_lmb

Daten für die App speichern

Neue Daten

Entities / Schulen

entities = subset(re_schulstand_df, spatial_name %in% relevant_schools) %>%
  rename(entity_id = spatial_name) %>%
  select(-gml_id, -spatial_alias, -spatial_type) %>%
  inner_join(rename(relevant_kapas, capacity=Kapa), by=c('entity_id'='Schulnummer')) %>%
  left_join(ndh_lmb, by=c('entity_id'))
coordinates(entities) = ~ lon + lat
proj4string(entities) = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
file.remove('app/data/entities.geojson')
[1] TRUE
entities %>% writeOGR('app/data/entities.geojson', layer="entities", driver="GeoJSON", check_exists=F)
entities@data %>% select(entity_id, capacity) %>% write_csv('app/data/entities.csv')

Units / Statistische Blöcke

units = subset(blk, BEZ == bez_id)

units@data$PLR = NULL
#units@data$Einw = NULL
units@data$BEZ = NULL
units@data$unit_id = units@data$BLK
units@data$BLK = NULL

units = units %>% sp::merge(sgbII_blk %>% rename(unit_id=BLK)) %>%
  sp::merge(optim_kids_in_blks %>% select(unit_id=BLK, population=kids))

file.remove('app/data/units.geojson')
[1] TRUE
units %>% writeOGR('app/data/units.geojson', layer="entities", driver="GeoJSON", check_exists=F)
units@data %>% write_csv('app/data/units.csv')

Zuordnung

assignment = solution %>% select(unit_id=BLK, entity_id=school)
assignment %>% write_csv('app/data/assignment.csv')

Weights / Schulwege

travel_from_blks %>%
  rename(unit_id=BLK, entity_id=dst) %>%
  #gather(weight, value, -unit_id, -entity_id) %>%
  write_csv('app/data/weights.csv')

Zusätzliche Daten

file.copy('download/RBS_OD_BEZ_2015_12.geojson', 'app/data/RBS_OD_BEZ_2015_12.geojson')
[1] FALSE
---
title: "R Notebook"
output: html_notebook
---

```{r libs, include=F, warning=F}
library(readr)
library(readxl)
library(rgdal)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggmap)
library(purrr)
library(knitr)
library(broom)
library(maptools)
library(rgeos)
```

Dieses Notebook bereitet die Daten für die Intelligent Zoning Engine vor. Es speichert

- entities.geojson — Schulen, deren Geokoordinaten und Attribute: entity_id, capacity, und andere Attribute
- entities.csv — Statistische Blöcke Berlins und optimierungsrelevante Attribute: entity_id, capacity
- units.geojson — Statistische Blöcke Berlins, deren Geometrie und Attribute
- units.csv — Statsitische Blöcke Berlins und optimierungsrelevante Attribute: unit_id, population, percentage_sgb
- weights.csv — optimierungsrelevante Gewichte wie Fußwege / Spalten: entity_id, unit_id, weight, value
- assignment.csv — eine initiale Zuordnung / Spalten: unit_id, entity_id

Bereits in anderen scripten wurde vorbereitet:

- Fußwegen von einer großen Sichprobe von (_Wohn_-)Gebäuden zu allen Schulen wurden berechnet und in `route_matrix.csv` gespeichert
- Die Stichprobe wurde in `sampled_buildings.csv` gespeichert

Die Daten werden wiefolgt vorbereitet:

- pro Block werden die Anzahl der einzuschulenden Kinder mit Hilfe der Einwohnerzahlen nach Alter auf LOR-Ebene in `EWR201512E_Matrix.csv` hochgerechnet
    - Kinder des LOR werden Anteilig nach Einwohnerzahl des Blocks im Verhältnis zum LOR auf die Blöcke verteilt
- es werden minimale, durchschnittliche und maximale Fußwege aus jedem Block errechnet

TODO:
- die sozioökonomischen Faktoren werden aus den Wahlbezirken auf die Blöcke hochgerechnet (https://github.com/berlinermorgenpost/cogran)

## Laden der Daten

```{r load data, message=FALSE, warning=FALSE}
sampled_buildings = read_rds('output/sampled_buildings.rds')
bez = readOGR('download/RBS_OD_BEZ_2015_12.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
blk = readOGR('download/RBS_OD_BLK_2015_12.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
lor = readOGR('download/RBS_OD_LOR_2015_12.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
re_schulstand = readOGR('download/re_schulstand.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
```

### Schulwege

```{r}
route_matrix = read_rds('output/route_matrix.rds')
```

### Schulkapazitäten und Einwohnerzahler auf LOR-Ebene

```{r}
kapas = read_csv('download/anmeldezahlen.csv') %>% filter(grepl('G', Schulnummer)) %>% filter(!is.na(`Plätze`))
einwohner_lor = read_delim('download/EWR201512E_Matrix.csv', delim=';')
```

## Überprüfung der Vollständigkeit der Daten über Anmeldezahlen/Kapazitäten

```{r}
re_schulstand_df = re_schulstand %>% as.data.frame() %>% rename(lon=coords.x1, lat=coords.x2)
re_schulstand_df %>% filter(grepl('G', spatial_name)) %>% mutate(BEZIRK=enc2utf8(as.character(BEZIRK))) %>%
  group_by(BEZIRK) %>% summarise(`Anzahl Schulen` = n()) %>%
  rename(Bezirk=BEZIRK) %>% left_join(kapas %>% group_by(Bezirk) %>% summarise(`Mit Kapazität` = n()))
```

Für welche Bezirke haben wir für alle Schulen Kapazitäten gegeben?

```{r}
re_schulstand %>% as.data.frame() %>% filter(grepl('G', spatial_name)) %>% mutate(BEZIRK=enc2utf8(as.character(BEZIRK))) %>% group_by(BEZIRK) %>% summarise(`Anzahl Schulen` = n()) %>%
  rename(Bezirk=BEZIRK) %>% left_join(kapas %>% group_by(Bezirk) %>% summarise(`Mit Kapazität` = n())) %>% filter(`Anzahl Schulen` == `Mit Kapazität`)
```

Überprüfung ob die Liste der Schulen und Liste der Schulen mit Kapazitätsinformationen gleich sind:

```{r}
bezirk = 'Tempelhof-Schöneberg'
schulen_mit_kapa = kapas %>% filter(Bezirk == bezirk) %>% .$Schulnummer
schulen_mit_kapa
grundschulen = re_schulstand %>% as.data.frame() %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK == bezirk) %>% .$spatial_name
'In Anmeldeliste, fehlt in Schulstand'
setdiff(schulen_mit_kapa, grundschulen)
'In re_schulstand, fehlt in Anmeldeliste'
setdiff(grundschulen, schulen_mit_kapa)
```


```{r}
map = get_map('Berlin')
```


```{r}
re_schulstand_df_w_kapas = re_schulstand_df %>% left_join(kapas, by=c('spatial_name'='Schulnummer'))
```

Plot aller Schulen, mit der Info, ob Kapazitätsinformationen verfügbar sind.

```{r}
data = re_schulstand_df_w_kapas %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK==bezirk) %>% mutate(missing.capa=is.na(`Plätze`))
ggmap(map) + geom_point(aes(lon, lat, color=missing.capa), data=data) +
    coord_map(xlim=c(min(data$lon)-0.01, max(data$lon)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
```

Filter auf Schulen mit Kapazitätsinformationen (für T-S sind das alle):

```{r}
relevant_schools = re_schulstand_df_w_kapas %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK==bezirk & !is.na(`Plätze`)) %>% .$spatial_name
relevant_schools
```

## Mapping Bezirk->LOR->Block

```{r}
df_bez = as.data.frame(bez)
df_lor = as.data.frame(lor)
df_blk = as.data.frame(blk)
```

### Sanity-Check: LORs und Blöcke im Bezirk

```{r}
bez_id = filter(df_bez, BEZNAME == bezirk)$BEZ
relevant_lors = df_lor %>% filter(BEZ == bez_id)
relevant_blks = df_blk %>% filter(BEZ == bez_id)
```

```{r}
ggplot() + geom_path(aes(x=long, y=lat, group=group), data=lor[lor$BEZ == bez_id,]) + coord_map() + geom_path(aes(x=long, y=lat, group=group), data=bez, color='red')
```

### Blöcke im Bezirk

```{r}
ggplot() + geom_path(aes(x=long, y=lat, group=group), data=blk[blk$BEZ == bez_id,]) + coord_map() + geom_path(aes(x=long, y=lat, group=group), data=bez[bez$BEZ == bez_id,], color='red')
```

## Alter auf Blockebene

Die Struktur der Datei in CSV-Format ist: BLK, 0,1,2,3,4,5,6,7,8,9,10,11,Gesamtergebnis.

```{r}
einwohner_blk = readOGR('download/03_2_BLK_Einw_Dez2015_Alter.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
```

```{r}
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>%
  left_join(
    einwohner_blk@data %>%
      set_names(strsplit("BLK,0,1,2,3,4,5,6,7,8,9,10,11,Gesamt", ",")[[1]]) %>%
      gather(age, num, -BLK, -Gesamt)
    , by=c('id'='BLK')
    ) %>%
  filter(!is.na(age)) %>%
  mutate(num=ifelse(num == 0, NA, num), age=factor(as.character(age), levels=as.character(0:11)))

ggmap(map) +
  geom_polygon(aes(x=long, y=lat, group=group, fill=num), data=data) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01)) +
  facet_wrap(~ age, ncol = 6)
```


```{r}
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>%
  left_join(
    einwohner_blk@data %>% set_names(strsplit("BLK,0,1,2,3,4,5,6,7,8,9,10,11,Gesamt", ",")[[1]]), by=c('id'='BLK')) %>%
  mutate(kids=(`5`+`6`)/2, kids=ifelse(kids == 0, NA, kids))

data$panel = "5&6"

data2 = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>%
  left_join(
    einwohner_blk@data %>% set_names(strsplit("BLK,0,1,2,3,4,5,6,7,8,9,10,11,Gesamt", ",")[[1]]), by=c('id'='BLK')) %>%
  mutate(kids=(`1`+`2`+`3`+`4`+`5`+`6`)/6, kids=ifelse(kids == 0, NA, kids))

data2$panel = "1-6"

data = rbind(data, data2)

ggmap(map) +
  geom_polygon(aes(x=long, y=lat, group=group, fill=kids), data=data) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01)) +
  facet_wrap(~ panel)
```


### Berechnung der Population der Blöcke, Strukturquote

```{r}
Strukturquote = 0.9

kids_in_blks = einwohner_blk@data %>%
  set_names(strsplit("BLK,0,1,2,3,4,5,6,7,8,9,10,11,Gesamt", ",")[[1]]) %>%
  mutate(kids=(`1`+`2`+`3`+`4`+`5`+`6`)/6*Strukturquote) # FIXME only 5 and 6?

row.names(kids_in_blks) = kids_in_blks$BLK
```

## Verfügbare Plätze

```{r}
relevant_kapas = kapas %>% select(Schulnummer, Kapa=`Plätze`) %>% filter(Schulnummer %in% relevant_schools) %>% as.data.frame()
```

### Überprüfung der Summe der Kapazitäten, Anmeldungen und Kinderstatistiken

```{r}
'Summe Kapas'
relevant_kapas %>% .$Kapa %>% sum
'Anmeldungen'
kapas %>% mutate(Anmeldungen = as.numeric(gsub('[^0-9]', '', Anmeldungen))) %>% filter(Schulnummer %in% relevant_schools) %>% .$Anmeldungen %>% sum
'Kids laut Statistik'
kids_in_blks$kids %>% sum
```

## Schulwege von Blöcken zu Schulen aggregieren

Für jedes Wohngebäude suchen wir den zugehörigen Block

```{r}
residential_buildings_blocks = sampled_buildings %>% inner_join(df_blk) %>% filter(BEZ == bez_id)
residential_buildings_blocks
```

```{r}
routes_from_blks = residential_buildings_blocks %>%
  inner_join(route_matrix %>% filter(dst %in% relevant_schools), by=c('OI'='src'))
head(routes_from_blks)
```

### Plot der relevanten Blöcke (mit Wohngebäuden) und Population

Welche Blöcke haben Routen( weil Wohngebäude) und eine Population (weil Kinder zwischen 1 und 6)?

```{r}
polys = tidy(blk[blk$BEZ == bez_id,], region='BLK')
routedata = polys %>% inner_join(routes_from_blks %>% group_by(BLK) %>% summarise(n=n()), by=c('id'='BLK'))
popdata = polys %>% inner_join(kids_in_blks %>% filter(kids > 0), by=c('id'='BLK'))

ggmap(map) +
  geom_polygon(aes(x=long, y=lat, group=group), fill='blue', data=popdata) +
  geom_polygon(aes(x=long, y=lat, group=group), fill='red', data=routedata) +
  coord_map(xlim=c(min(routedata$long)-0.01, max(routedata$long)+0.01), ylim=c(min(routedata$lat)-0.01, max(routedata$lat)+0.01))

ggmap(map) +
  geom_polygon(aes(x=long, y=lat, group=group), fill='red', data=routedata) +
  geom_polygon(aes(x=long, y=lat, group=group), fill='blue', data=popdata) +
  coord_map(xlim=c(min(routedata$long)-0.01, max(routedata$long)+0.01), ylim=c(min(routedata$lat)-0.01, max(routedata$lat)+0.01))

```


```{r}
travel_from_blks = routes_from_blks %>% as.data.frame() %>% group_by(BLK, dst) %>% summarise(min=min(distance), avg=mean(distance), med=median(distance), max=max(distance)) %>% ungroup
travel_from_blks
```

### Plot der Blöcke mit Färbung nach durchschnittlichem Weg zur nächsten Schule

```{r}
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(travel_from_blks %>% group_by(BLK) %>% top_n(1, -avg), by=c('id'='BLK'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=-avg), data=data) +
  geom_point(aes(lon, lat), color='red', data = re_schulstand_df %>% filter(BEZIRK==bezirk & SCHULART=='Grundschule')) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
```


```{r}
travel_matrix = travel_from_blks %>% select(BLK, dst, avg) %>% spread(dst, avg)
dim(travel_matrix)
travel_matrix
```

## Sozioökonomische Daten

Wir haben Sozioökonomische Daten in den Wahlbezirken. Strategie:
- Schneiden der Wahlbezirke mit den Blöcken
- Übernahme des Prozentwertes vom Wahlbezirk für jeden (Unter-)block
- Zuordnung zu jedem Block und Vereinigung durch Flächen/Wohnhaus-gewichtetes Mittel des Prozentwertes 

```{r}
UWB = readOGR('download/RBS_OD_UWB_AGH2016.geojson', layer = 'OGRGeoJSON', stringsAsFactors = FALSE)
UWB$ID = paste0(UWB$BEZ, UWB$UWB)
sozio_UWB = read_excel('download/DL_BE_AH2016_Strukturdaten.xlsx', sheet = 3) %>% select(ID, sgbIIu65=`Einwohner unter 65 in SGB II 2014 Prozent`)
UWB = UWB %>% sp::merge(sozio_UWB, by='ID')

ggplot(broom::tidy(UWB, region='ID') %>% inner_join(UWB %>% as.data.frame, by=c('id'='ID'))) + geom_polygon(aes(x=long, y=lat, group=group, fill=sgbIIu65)) + coord_map()
```

Check for self intersections
```{r}
wgs84 = CRS(proj4string(blk))
ea_projection = CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs")
blk_in_bez = blk[blk$BEZ == bez_id,]
gIsValid(blk_in_bez)
blk_in_bez = gBuffer(spTransform(blk_in_bez, ea_projection), byid=T, width = -0.1)
any(xor(gIntersects(blk_in_bez, byid=T, returnDense = T), diag(1, length(blk_in_bez)) == 1))
plot(blk_in_bez)
#gIntersects(UWB[UWB$BEZ == '07',], byid=TRUE)
```

```{r}
uwb_in_bez = gBuffer(spTransform(UWB[UWB$BEZ == '07',], ea_projection), byid=T, width=-0.1)
gIsValid(uwb_in_bez)
any(xor(gIntersects(uwb_in_bez, byid=T, returnDense = T), diag(1, length(uwb_in_bez)) == 1))
plot(uwb_in_bez)
```

```{r}
intersection = gIntersection(uwb_in_bez, blk_in_bez, byid = T, drop_lower_td = T)

gIsValid(intersection)
plot(intersection)
```

```{r}
intersection_uwb_data = intersection %>% over(uwb_in_bez)
intersection_blk_data = intersection %>% over(blk_in_bez)
intersection_area = gArea(intersection, byid=T)
intersection_data = cbind(
    intersection_uwb_data %>% select(UWB_ID=ID, sgbIIu65),
    intersection_blk_data %>% select(BLK, BLK_Einw=Einw),
    data.frame(area=intersection_area) # FIXME how else to normalize?
    )
```

```{r}
length(intersection)
nrow(blk_in_bez)
# did we miss any blocks?
setdiff(blk_in_bez$BLK, intersection_data$BLK)
```

Pro Block mische die SGB-Werte der Unterblöcke gewichtet nach Fläche.
FIXME - besser nach Anzahl der Wohnhäuser?
```{r}
sgbII_blk = intersection_data %>%
  group_by(BLK) %>%
  summarise(sgbIIu65=sum(sgbIIu65*area)/sum(area)/100)
```


```{r}
ggplot(broom::tidy(spTransform(blk_in_bez, wgs84), region='BLK') %>% left_join(sgbII_blk, by=c('id'='BLK'))) + geom_polygon(aes(x=long, y=lat, group=group, fill=sgbIIu65)) + coord_map()
```

## Adjazenz von Blöcken

```{r}
row.names(blk_in_bez) = blk_in_bez$BLK
buffeded_blk_in_bez = gBuffer(blk_in_bez, byid=T, width=40)
adjacency = gIntersects(gBuffer(blk_in_bez, byid=T, width=40), byid = T)
rownames(adjacency) = blk_in_bez$BLK
colnames(adjacency) = blk_in_bez$BLK

adjacency_df = adjacency %>% as.data.frame() %>% mutate(from=rownames(.)) %>%
  gather(to, connected, -from) %>% filter(connected) %>%
  inner_join(spTransform(blk_in_bez, wgs84) %>% coordinates() %>% as.data.frame() %>% rename(from_long=V1, from_lat=V2) %>% mutate(from=rownames(.))) %>%
  inner_join(spTransform(blk_in_bez, wgs84) %>% coordinates() %>% as.data.frame() %>% rename(to_long=V1, to_lat=V2) %>% mutate(to=rownames(.)))

ggplot() +
  geom_polygon(aes(x=long, y=lat, group=group), fill='gray', data=broom::tidy(spTransform(blk_in_bez, wgs84), region='BLK')) + 
  geom_segment(aes(x=from_long, y=from_lat, xend=to_long, yend=to_lat), size=0.1, color='black', data=adjacency_df) +
  theme_nothing() + coord_map()

ggsave('figs/adjacency.pdf')
adjacency_df %>% filter(connected & from != to) %>% select(from, to) %>% write_csv('app/data/adjacency.csv')
```


## Relevante Daten auswählen

```{r}
optim_kapas = relevant_kapas
optim_kids_in_blks = kids_in_blks %>% filter(kids > 0) %>% inner_join(travel_matrix, by='BLK') %>% select(BLK, kids)
nrow(optim_kids_in_blks)
nrow(optim_kapas)

select_schools = as.character(optim_kapas$Schulnummer)
select_blks = as.character(optim_kids_in_blks$BLK)

optim_matrix = inner_join(optim_kids_in_blks, travel_matrix, by='BLK')[select_schools]

dim(optim_matrix)

optim_kapas$Kapa %>% sum
optim_kids_in_blks$kids %>% sum
```

## Naive (initiale) Zuordnung: Jeder Block zur nächsten Schule

```{r}
solution = optim_matrix %>% mutate(BLK=optim_kids_in_blks$BLK) %>% gather(school, dist, -BLK) %>% group_by(BLK) %>% top_n(1, -dist) %>% ungroup

optim_matrix %>% t %>% as.data.frame %>% summarise_each(funs(min)) %>% sum()

solines = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school')) %>% inner_join(cbind(as.data.frame(coordinates(blk[blk$BEZ == bez_id,])), blk[blk$BEZ == bez_id,]@data['BLK']))

data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(solution, by=c('id'='BLK'))
ggmap(map, darken = c(0.5, 'white')) + geom_polygon(aes(x=long, y=lat, group=group, fill=school), data=data) +
  geom_segment(aes(x=V1,y=V2,xend=lon,yend=lat), data=solines, size=0.3) +
  geom_point(aes(lon, lat), color='black', size=2, data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  geom_point(aes(lon, lat, color=spatial_name), data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01)) +
  guides(color=F, fill=F)
```

## Darstellung der Zuordnung als Tabelle

```{r}
library(formattable)
```

```{r}
solution %>% inner_join(optim_kids_in_blks, by='BLK') %>% inner_join(travel_from_blks, by=c('BLK'='BLK', 'school'='dst')) %>%
  group_by(school) %>% summarise(
    kids=sum(kids),
    num_blocks=n(),
    min_dist=min(min),
    avg_dist=mean((kids*avg)/sum(kids)),
    max_dist=max(max)
  ) %>%
  inner_join(relevant_kapas, by=c('school'='Schulnummer')) %>%
  mutate(
    utilization=kids/Kapa
  ) %>% select(
   Schule=school,
   `Blöcke`=num_blocks,
   Kapazität=Kapa,
   Kinder=kids,
   Auslastung=utilization,
   `Weg (min)`=min_dist,
   `Weg (Ø)`=avg_dist,
   `Weg (max)`=max_dist
  ) %>%
  formattable(
    list(
      Kinder = formatter("span", x ~ digits(x, 2)),
      Auslastung = formatter("span",
        style = x ~ style(color = ifelse(x < 1, "green", "red")),
        x ~ icontext(ifelse(x < 1, "ok", "remove"), percent(x))
      ),
      `Weg (Ø)` = proportion_bar("lightblue"),
      `Weg (min)` = proportion_bar("lightblue"),
      `Weg (max)` = proportion_bar("lightblue")
    )
  )
```

## ndH und LMB-Daten (nicht offen)

```{r}
ndh_lmb = read_excel('download/LMB_ndH2015.xlsx') %>% select(
  entity_id=BSN,
  ndH=`Anteil ndH`,
  LMB=`Anteil LMB`
)
ndh_lmb
```


## Daten für die App speichern

- entities.geojson
- entities.csv
- units.geojson
- units.csv
- weights.csv
- assignment.csv

### Neue Daten


#### Entities / Schulen

```{r}
entities = subset(re_schulstand_df, spatial_name %in% relevant_schools) %>%
  rename(entity_id = spatial_name) %>%
  select(-gml_id, -spatial_alias, -spatial_type) %>%
  inner_join(rename(relevant_kapas, capacity=Kapa), by=c('entity_id'='Schulnummer')) %>%
  left_join(ndh_lmb, by=c('entity_id'))
coordinates(entities) = ~ lon + lat
proj4string(entities) = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
file.remove('app/data/entities.geojson')
entities %>% writeOGR('app/data/entities.geojson', layer="entities", driver="GeoJSON", check_exists=F)
entities@data %>% select(entity_id, capacity) %>% write_csv('app/data/entities.csv')
```

#### Units / Statistische Blöcke

```{r}
units = subset(blk, BEZ == bez_id)

units@data$PLR = NULL
#units@data$Einw = NULL
units@data$BEZ = NULL
units@data$unit_id = units@data$BLK
units@data$BLK = NULL

units = units %>% sp::merge(sgbII_blk %>% rename(unit_id=BLK)) %>%
  sp::merge(optim_kids_in_blks %>% select(unit_id=BLK, population=kids))

file.remove('app/data/units.geojson')
units %>% writeOGR('app/data/units.geojson', layer="entities", driver="GeoJSON", check_exists=F)
units@data %>% write_csv('app/data/units.csv')
```

#### Zuordnung

```{r}
assignment = solution %>% select(unit_id=BLK, entity_id=school)
assignment %>% write_csv('app/data/assignment.csv')
```

#### Weights / Schulwege

```{r}
travel_from_blks %>%
  rename(unit_id=BLK, entity_id=dst) %>%
  #gather(weight, value, -unit_id, -entity_id) %>%
  write_csv('app/data/weights.csv')
```

#### Zusätzliche Daten

```{r}
file.copy('download/RBS_OD_BEZ_2015_12.geojson', 'app/data/RBS_OD_BEZ_2015_12.geojson')
```

